#Nature's Kidneys: the Role of Wetland Reserve Easements in Restoring Water Quality
#Nicole Karwowski and Marin Skidmore
#Replication File
#2025

#CDL data for the MARB
#2008-2018

#load packages
#load libraries
library(ggplot2)
library(raster)
library(sf)
library(exactextractr)
library(Rcpp)
library(data.table)
library(tidyverse)
library(Hmisc)
library(haven)
library(stargazer)
library(purrr)
library(tidyr)
library(collapse)
library(ggthemes)
library(scales)
library(tigris)
options(tigris_use_cache = TRUE)
library(mapview)
options(scipen=999)
library(stringr)
library(data.table)
library(lubridate)
library(units)
library(geosphere)
library(plm)
library(lmtest)
library(sandwich)
library(fixest)
library(did)
library(stargazer)
library(naniar)
library(scales)
library(terra)
library(CropScapeR) 
library(reshape2)
library(mapview)
library(leaflet)

#Help files
#https://tmieno2.github.io/R-as-GIS-for-Economists/CropScapeR.html#data-processing-after-downloading-data
#https://cran.r-project.org/web/packages/exactextractr/vignettes/vig2_categorical.html 
#CDL Land Cover HUC 12 in Easements Project 

################################################################################

#open HUC 12 shapefile data
#HUC 12
setwd("C:/Users/16308/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package")

MARB<-st_read("Data/WatershedBoundaries/Processed/MARB/MARB.shp")
MARBkey<-subset(as.data.frame(MARB), select=-c(geometry, areaacres, shape_area, tohuc))

#Extract CDL pixels for each huc12 in each year
#CDL
#2018
cdl2018<-raster("Data/CDL/Raw/2018_30m_cdls/2018_30m_cdls.tif")

#make the MARB sf the same crs as the CDL databases
MARB<-st_transform(MARB, crs=st_crs(cdl2018))

#extract land cover in 2018
landcov_huc_2018 <- exact_extract(cdl2018, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2018)


#save it
save(landcov_huc_2018, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2018.RData")

#2017
cdl2017<-raster("Data/CDL/Raw/2017_30m_cdls/2017_30m_cdls.tif")

#extract land cover in 2017
landcov_huc_2017 <- exact_extract(cdl2017, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2017)

save(landcov_huc_2017, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2017.RData")

#2016
cdl2016<-raster("Data/CDL/Raw/2016_30m_cdls/2016_30m_cdls.tif")

landcov_huc_2016 <- exact_extract(cdl2016, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2016)

save(landcov_huc_2016, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2016.RData")

#2015
cdl2015<-raster("Data/CDL/Raw/2015_30m_cdls/2015_30m_cdls.tif")

landcov_huc_2015 <- exact_extract(cdl2015, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2015)

save(landcov_huc_2015, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2015.RData")

#2014
cdl2014<-raster("Data/CDL/Raw/2014_30m_cdls/2014_30m_cdls.tif")

landcov_huc_2014 <- exact_extract(cdl2014, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2014)

save(landcov_huc_2014, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2014.RData")


#2013
cdl2013<-raster("Data/CDL/Raw/2013_30m_cdls/2013_30m_cdls.tif")

landcov_huc_2013 <- exact_extract(cdl2013, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2013)

save(landcov_huc_2013, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2013.RData")

#2012
cdl2012<-raster("Data/CDL/Raw/2012_30m_cdls/2012_30m_cdls.tif")

landcov_huc_2012 <- exact_extract(cdl2012, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2012)

save(landcov_huc_2012, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2012.RData")

#2011
cdl2011<-raster("Data/CDL/Raw/2011_30m_cdls/2011_30m_cdls.tif")

landcov_huc_2011 <- exact_extract(cdl2011, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2011)

save(landcov_huc_2011, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2011.RData")

#2010
cdl2010<-raster("Data/CDL/Raw/2010_30m_cdls/2010_30m_cdls.tif")

landcov_huc_2010 <- exact_extract(cdl2010, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2010)

save(landcov_huc_2010, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2010.RData")

#2009
cdl2009<-raster("Data/CDL/Raw/2009_30m_cdls/2009_30m_cdls.tif")

landcov_huc_2009 <- exact_extract(cdl2009, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2009)

save(landcov_huc_2009, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2009.RData")

#2008
cdl2008<-raster("Data/CDL/Raw/2008_30m_cdls/2008_30m_cdls.tif")

landcov_huc_2008 <- exact_extract(cdl2008, MARB, function(df) {
  df %>%
    mutate(frac_total = coverage_fraction / sum(coverage_fraction)) %>%
    dplyr::group_by(ID, value) %>%
    dplyr::summarize(freq = sum(frac_total))
}, summarize_df = TRUE, include_cols = c('ID', 'huc12'), progress = TRUE)

list(landcov_huc_2008)

save(landcov_huc_2008, file="Data/CDL/Processed/yearly_huc12/Landcov_huc_2008.RData")

####################################################################################
#open data and transform to panel
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2018.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2017.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2016.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2015.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2014.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2013.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2012.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2011.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2010.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2009.RData")
load("Data/CDL/Processed/yearly_huc12/Landcov_huc_2008.RData")

#add the year
landcov_huc_2018$year<-2018
landcov_huc_2017$year<-2017
landcov_huc_2016$year<-2016
landcov_huc_2015$year<-2015
landcov_huc_2014$year<-2014
landcov_huc_2013$year<-2013
landcov_huc_2012$year<-2012
landcov_huc_2011$year<-2011
landcov_huc_2010$year<-2010
landcov_huc_2009$year<-2009
landcov_huc_2008$year<-2008

#merge all the data
landcov_huc<-dplyr::bind_rows(landcov_huc_2008, landcov_huc_2009, landcov_huc_2010,
                              landcov_huc_2011,landcov_huc_2012,landcov_huc_2013,
                              landcov_huc_2014, landcov_huc_2015, landcov_huc_2016, 
                              landcov_huc_2017, landcov_huc_2018)
rm(landcov_huc_2008)
rm(landcov_huc_2009)
rm(landcov_huc_2010)
rm(landcov_huc_2011)
rm(landcov_huc_2012)
rm(landcov_huc_2013)
rm(landcov_huc_2014)
rm(landcov_huc_2015)
rm(landcov_huc_2016)
rm(landcov_huc_2017)
rm(landcov_huc_2018)

data(linkdata)

landcov_huc<-dplyr::left_join(landcov_huc, linkdata, by=c('value'='MasterCat'))
list(landcov_huc)
table(landcov_huc$Crop)


landcov_huc<-dplyr::left_join(landcov_huc, MARBkey, by=c('ID'='ID'))

landcov_huc<- landcov_huc %>% relocate(huc12, .before=ID)
landcov_huc<- landcov_huc %>% relocate(year, .before=huc12)
landcov_huc<- landcov_huc %>% relocate(Crop, .after=value)

#group the categories
table(landcov_huc$Crop, useNA="always")

landcov_huc$cover<-landcov_huc$Crop
landcov_huc$cover<-ifelse(landcov_huc$Crop %in% c("Deciduous_Forest", 
                          "Evergreen_Forest", "Mixed_Forest"), "Forest", 
                          landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop %in% c("Developed/High_Intensity", 
                          "Developed/Low_Intensity",
                          "Developed/Med_Intensity", "Developed/Open_Space"), 
                          "Developed", landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop %in% c("Dbl_Crop_Barley/Corn", "Dbl_Crop_Barley/Sorghum", "Dbl_Crop_Barley/Soybeans",
                                                  "Dbl_Crop_Corn/Soybeans", "Dbl_Crop_Oats/Corn", "Dbl_Crop_Soybeans/Oats",
                                                  "Dbl_Crop_WinWht/Corn", "Dbl_Crop_WinWht/Sorghum", "Dbl_Crop_WinWht/Soybeans",
                                                  "Dbl_Crop_Soybeans/Cotton", "Dbl_Crop_WinWht/Cotton"), 
                          "Double", landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop %in% c("Barley", "Oats", "Winter_Wheat", "Durum_Wheat", "Spring_Wheat", "Rice",
                                                  "Millet", "Rye", "Sorghum", "Speltz", "Triticale", "Other_Small_Grains"), "Grain", landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop %in% c("Alfalfa", "Clover/Wildflowers", "Other_Hay/Non_Alfalfa", "Sod/Grass_Seed",
                                                  "Switchgrass", "Mustard"), "Forage", landcov_huc$cover )

landcov_huc$cover<-ifelse(landcov_huc$Crop %in% c("Almonds", "Apricots", "Apples", "Aquaculture", "Asparagus", "Blueberries", "Broccoli", "Buckwheat",
                                                  "Cabbage", "Camelina", "Caneberries", "Canola", "Cantaloupes", "Carrots", "Celery", "Citrus",
                                                  "Cauliflower", "Cherries", "Christmas_Trees", "Cranberries", "Cucumbers",
                                                  "Dry_Beans", "Eggplants", "Flaxseed", "Garlic", "Gourds", "Grapes", "Greens", "Herbs", "Hops",
                                                  "Mint", "Misc_Vegs_&_Fruits", "Onions", "Other_Crops", "Other_Tree_Crops",
                                                  "Pears", "Peas", "Peppers", "Pop_or_Orn_Corn", "Potatoes", "Pumpkins", "Radishes",
                                                  "Safflower", "Squash", "Strawberries", "Sugarbeets", "Sunflower", "Sweet_Corn",
                                                  "Sweet_Potatoes", "Tobacco", "Tomatoes", "Turnips", "Vetch", "Walnuts", "Watermelons",
                                                  "Lentils", "Lettuce", "Nectarines", "Olives", "Oranges", "Peaches", "Peanuts",
                                                  "Pecans", "Pistachios", "Plums", "Rape_Seed", "Sugarcane"),
                          "OtherCrops", landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop=="Fallow/Idle_Cropland", "Fallow", landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop=="Perennial_Ice/Snow_", "Ice_Snow", landcov_huc$cover)

landcov_huc$cover<-ifelse(landcov_huc$Crop=="Grassland/Pasture", "Grassland_Pasture", landcov_huc$cover)

table(landcov_huc$cover)

#aggregate by cover
landcov_agg<-aggregate(landcov_huc$freq, by=list(landcov_huc$huc12, landcov_huc$year, landcov_huc$cover), FUN=sum)

summary(landcov_agg)
names(landcov_agg)<-c("huc12", "year", "cover", "proportion")

#turn from long to wide
landcov_agg_wide<-dcast(landcov_agg, huc12 + year ~ cover, value.var="proportion")
summary(landcov_agg_wide)

#if its NA, replace with 0. 
landcov_agg_wide<-landcov_agg_wide %>% replace(is.na(.), 0)
summary(landcov_agg_wide)

cdl_huc12<-landcov_agg_wide

#save the data
save(cdl_huc12, file="Data/CDL/Processed/cdl_landcover_huc12_year_panel.RData")
